capture log close sublog
log using "$projdir/log/6_treated_donor_cty.txt", name(sublog) text replace

*-------------------------------------------------------------------------------
* Description: Get the datasets of treated and donor  QCEW and MCD counties
* Author: Denis Sosinskiy
*
* Updated: July 23, 2023
*-------------------------------------------------------------------------------
clear

* Create tempfiles
tempfile core
tempfile core0
tempfile core1
tempfile core2
tempfile core3
tempfile core4

*--------------------------------------
* QCEW
*--------------------------------------

* Set start and end quarters
local trp = 218
local start_tm = 199
local end_tm = 251
local tot_tm = `end_tm' - `start_tm' + 1
local endqtrlab "2019Q4"
if `end_tm' == 251 {
	local endqtrlab "2022Q4"
}

* Ensure distinct package is installed
capture ssc install distinct

* Load the data
use "$projdir/dta/build/cln/analysis_panel_cty.dta", clear

* Rename key variables
qui rename (quarterly_date earnings lnemp ind) (tm demean_lnearn demean_lnemp naics)
format tm %tq

* Generate unique county id and drop obs which don't observe it
qui gen cty_fips = 1000*statefips + countyfips
qui keep if !mi(cty_fips) & !mi(avg_wkly_wage) & !mi(employment) & avg_wkly_wage != 0 & employment != 0
assert cty_fips
assert avg_wkly_wage
assert employment
assert !mi(avg_wkly_wage)
assert !mi(employment)

* Recode pre-2011 full- and limited-service restaurant NAICS codes
qui recode naics (722211=722513) if year <= 2011

* Keep NAICS 10, 722, and 722513
qui keep if inlist(naics, 10, 722, 722513)

* Sort
sort cty_fips tm naics

* Restrict to desired time-period
keep if inrange(tm, `start_tm', `end_tm')

* Identify treated and donor counties
levelsof cty_fips if statefips == 6 | statefips == 36, local(trcty)
local trctylist = subinstr("`trcty'", " ", ",",.)
qui distinct cty_fips if inlist(cty_fips, `trctylist')
local trcty_count = r(ndistinct)

* Drop counties with data issues
qui drop if cty_fips == 13121 // Fulton County, GA has an issue in the raw earnings data
qui drop if cty_fips == 36029 //  Erie County has a weird jump in the earnings data

* Generate donor variable
qui gen donor = !inlist(cty_fips, `trctylist')

* Restrict to donor counties with no min wage changes (and the treated counties)
qui egen double max_loc_max = max(loc_max_mw), by(cty_fips)
qui egen double min_loc_max = min(loc_max_mw), by(cty_fips)
qui gen x = (naics == 722 & min_emp >= 5000)
qui drop if cty_fips == 13121
bysort cty_fips: egen empkeep = max(x)
qui keep if (max_loc_max == min_loc_max | donor == 0) & empkeep == 1
if `end_tm' > 239 {
	qui drop if inlist(floor(cty_fips/1000), 25, 51)
}

* Save tempfile
qui compress
qui save "`core'", replace

* Make county change adjustments and get mean of avg_wkly_wage over remaining counties
qui do "$projdir/do/build/cw/cty_changes_NYCnocombine.do"
qui collapse (mean) avg_wkly_wage donor year qtr [aw=employment], by(cty_fips tm naics)
assert inlist(donor, 0, 1)

* Save tempfile
qui save "`core2'", replace

* Reload the data
qui use "`core'", clear

* Make county change adjustments and get sum of employment over remaining counties
qui do "$projdir/do/build/cw/cty_changes_NYCnocombine.do"
qui collapse (sum) employment pop10, by(cty_fips tm naics)

* Merge
qui merge 1:1 cty_fips tm naics using "`core2'", nogen norep

* Keep only a balanced panel over periods of interest
qui keep if inrange(tm, `start_tm', `end_tm')	
bysort cty_fips naics: gen ct = _N
qui keep if ct == `tot_tm'

* Merge with the LAUS data
qui merge m:1 cty_fips year using "$projdir/dta/build/cln/laus_cty_NYCnocombine.dta", nogen norep keep(3)

* Keep a single obs for each county
qui keep if tm == 216
qui keep if naics == 722513
qui keep cty_fips
bysort cty_fips: assert _N == 1

preserve
	* Drop treated counties and save
	qui drop if floor(cty_fips/1000) == 6 | floor(cty_fips/1000) == 36
	save "$projdir/dta/build/cln/donor_cty.dta", replace
restore

* Drop donor counties and save
qui keep if floor(cty_fips/1000) == 6 | floor(cty_fips/1000) == 36
save "$projdir/dta/build/cln/treated_cty.dta", replace

*--------------------------------------
* McDonald's data 
*(Not implemented in the replication package)
*--------------------------------------
/*
clear all

* Load the MCD data
use "$projdir/dta/build/src/MR_fips_McW.dta", replace

* Clean data
assert year
assert fips
assert hwage 
assert BMPH

keep year fips

gen statefips = floor(fips/1000)

* Ensure balanced panel
xtset fips year

gen gap = (year != 2016 & year != . & L.year == .) 
by fips: egen gap_ever = max(gap)

by fips: gen dup = cond(_N==1,0,_n)
by fips: egen tot_yrs = max(dup)
replace tot_yrs = 1 if tot_yrs == 0
drop dup

gen tm2016_2022 = (tot_yrs == 7)
by fips: egen last_yr = max(year)
gen tm2016_2021 = 0
by fips: replace tm2016_2021 = 1 if tot_yrs == 7 | (tot_yrs == 6 & last_yr == 2021)

gen tm2016_2020 = 0
by fips: replace tm2016_2020 = 1 if (tm2016_2022 + tm2016_2021 > 0) ///
| (tot_yrs == 5 & last_yr == 2020)

rename fips cty_fips

merge m:1 cty_fips using "$projdir/dta/build/cln/treated_cty.dta", keep(1 3)
gen treat = (_merge == 3)
drop _merge

merge m:1 cty_fips using "$projdir/dta/build/cln/donor_cty.dta", keep(1 3)
gen donor = (_merge == 3)
drop _merge

drop if donor + treat == 0 

keep if tm2016_2022 == 1

bysort cty_fips:  gen dup = cond(_N==1,0,_n)
drop if dup > 1
keep cty_fips

save "$projdir/dta/build/cln/mcd_cty_subsample.dta", replace
*/

* Close log file
log close sublog
